!ls
sample_data
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!kaggle competitions download -c dogs-vs-cats
Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /root/.kaggle/kaggle.json' Downloading dogs-vs-cats.zip to /content 98% 795M/812M [00:06<00:00, 177MB/s] 100% 812M/812M [00:06<00:00, 134MB/s]
import zipfile
zip_ref_1 = zipfile.ZipFile('/content/dogs-vs-cats.zip', 'r')
zip_ref_1.extractall('/content')
zip_ref_1.close()
zip_ref_2 = zipfile.ZipFile('/content/train.zip', 'r')
zip_ref_2.extractall('/content')
zip_ref_2.close()
# Importing libraries for numerical operations and data manipulation
import numpy as np
import pandas as pd
import random
random.seed(42) # Setting random seed for reproducibility
# Importing libraries for file handling and system operations
import os, shutil, pathlib
# Importing libraries for data visualization
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objs as go
from PIL import Image
# Importing libraries for deep learning
from tensorflow import keras
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.models import load_model
from tensorflow.python.keras.models import Sequential
# Importing libraries for machine learning model evaluation
from sklearn.metrics import confusion_matrix, classification_report, precision_recall_curve
# Importing specific components from Keras
from keras.layers import Dense, Conv2D, Flatten, Dropout
# Setting up offline mode for Plotly
plotly.offline.init_notebook_mode()
# Ignoring warnings
import warnings
warnings.filterwarnings("ignore")
# Paths to the original dataset
original_dir = pathlib.Path("./train/train/")
data_folder = pathlib.Path("./train/kaggle_dogs_vs_cats_small")
# Function to create a subset of the dataset with a specified range of files
def make_subset(subset_name, start_index, end_index):
for category in ("cat", "dog"):
dir = data_folder / subset_name / category # Path for the current category within the subset
os.makedirs(dir, exist_ok=True)
fnames = [f"{category}.{i}.jpg" for i in range(start_index, end_index)]
for fname in fnames:
shutil.copyfile(src=original_dir / fname, dst=dir / fname)
# Total number of files
total_files = 4000
# Calculate the number of files for validation and test subsets (15% each)
validation_size = int(0.15 * total_files)
test_size = int(0.15 * total_files)
# Calculate the number of files for the training subset
train_size = total_files - validation_size - test_size
# Create subsets for training, validation, and testing with specified start and end indices
make_subset("train", start_index=0, end_index=train_size)
make_subset("validation", start_index=train_size, end_index=train_size + validation_size)
make_subset("test", start_index=train_size + validation_size, end_index=train_size + validation_size + test_size)
num_samples = 9 # Images to plot
# List all image filenames
image_filenames = os.listdir(original_dir)
# Shuffle the list of image filenames randomly
random.shuffle(image_filenames)
# Plotting the images
plt.figure(figsize=(12, 24))
for index in range(num_samples):
# Get the filename of the image at the current index
filename = image_filenames[index]
# Determine whether the image is of a cat or a dog based on the filename
category = "cat" if "cat" in filename else "dog"
# Load the image with a target size of (150, 150) (adjust target_size as needed)
img_path = os.path.join(original_dir, filename)
img = load_img(img_path, target_size=(150, 150))
# Create a subplot and plot the image
plt.subplot(6, 3, index + 1)
plt.imshow(img)
plt.xlabel(filename + ' (' + category + ')' )
plt.tight_layout()
plt.show()
# Get a list of filenames in the original directory
filenames = os.listdir(original_dir)
# Initialize an empty list to store categories
categories = []
# Iterate over each filename
for filename in filenames:
category = filename.split('.')[0]
if category == 'dog':
categories.append("dog")
else:
categories.append("cat")
# Create a DataFrame using filenames and corresponding categories
df = pd.DataFrame({
'filename': filenames,
'category': categories
})
df.head()
| filename | category | |
|---|---|---|
| 0 | cat.5184.jpg | cat |
| 1 | cat.8460.jpg | cat |
| 2 | cat.8791.jpg | cat |
| 3 | cat.3757.jpg | cat |
| 4 | dog.10840.jpg | dog |
# Paths to the data folder
data_folder = pathlib.Path("./train/kaggle_dogs_vs_cats_small")
train_dir = data_folder / "train"
filenames = os.listdir(train_dir)
# Extract labels from filenames (assuming filenames are in the format 'cat.xxx.jpg' or 'dog.xxx.jpg')
labels = [str(x)[:3] for x in filenames]
# Create a DataFrame using filenames and labels
train_df = pd.DataFrame({'filename': filenames, 'label': labels})
train_df.head()
| filename | label | |
|---|---|---|
| 0 | dog | dog |
| 1 | cat | cat |
def count_files(data_folder, subset_names, categories):
"""
Count the number of files in each category within each subset.
Args:
- data_folder (str): The path to the data folder.
- subset_names (list): Names of data subsets.
- categories (list): Categories within each data subset.
Returns:
- counts (list of lists): List of counts for each category within each subset.
"""
counts = []
for subset_name in subset_names:
subset_counts = []
for category in categories:
subset_dir = os.path.join(data_folder, subset_name, category)
count = len(os.listdir(subset_dir))
subset_counts.append(count)
counts.append(subset_counts)
return counts
def create_plot(subset_names, categories, counts):
"""
Create a plot of the distribution of data subsets.
Args:
- subset_names (list): Names of data subsets.
- categories (list): Categories within each data subset.
- counts (list of lists): List of counts for each category within each subset.
"""
# Create traces for each category
traces = []
for i, category in enumerate(categories):
trace = go.Bar(
x=subset_names,
y=[count[i] for count in counts],
name=category
)
traces.append(trace)
# Layout configuration for the plot
layout = go.Layout(
title='Distribution of Data Subset',
xaxis=dict(title='Subset'),
yaxis=dict(title='Number of Files'),
barmode='group'
)
# Create the figure using the traces and layout
fig = go.Figure(data=traces, layout=layout)
# Display the plot
fig.show()
subset_names = ["train", "validation", "test"]
categories = ["cat", "dog"]
# Count files
counts = count_files(data_folder, subset_names, categories)
# Create and display plot
create_plot(subset_names, categories, counts)
import os
import plotly.graph_objs as go
from plotly.subplots import make_subplots
def create_bar_chart(subset_name, categories, counts):
"""
Create a bar chart for a data subset.
Args:
- subset_name (str): Name of the data subset.
- categories (list): Categories within the data subset.
- counts (list): Counts for each category within the subset.
"""
# Create bars for each category
bars = go.Bar(
x=categories,
y=counts,
name=subset_name
)
return bars
# Example usage
subset_names = ["train", "validation", "test"]
categories = ["cat", "dog"]
# Count files
counts = count_files(data_folder, subset_names, categories)
# Create subplots with shared y axes
fig = make_subplots(rows=1, cols=len(subset_names), subplot_titles=subset_names)
# Add bar charts to subplots
for i, subset_name in enumerate(subset_names, start=1):
bars = create_bar_chart(subset_name, categories, counts[i-1])
fig.add_trace(bars, row=1, col=i)
# Update layout
fig.update_layout(
title='Distribution of Data Subsets (Bar Charts)',
showlegend=True
)
# Display the plot
fig.show()
# Define paths to cat and dog images
cat_img_path = data_folder / "train" / "cat" / "cat.30.jpg" # Path to cat image
dog_img_path = data_folder / "train" / "dog" / "dog.20.jpg" # Path to dog image
# Create subplots
fig, axes = plt.subplots(1, 2, figsize=(10, 5))
# Plot cat image
cat_img = Image.open(cat_img_path) # Open cat image
axes[0].imshow(cat_img) # Display cat image
axes[0].set_title('Cat Image') # Set title for the subplot
# Plot dog image
dog_img = Image.open(dog_img_path) # Open dog image
axes[1].imshow(dog_img) # Display dog image
axes[1].set_title('Dog Image') # Set title for the subplot
# Show the plot
plt.show()
# Loading the training dataset from the specified directory
train_dataset = image_dataset_from_directory(
data_folder / "train", # Path to the training data folder
image_size=(256, 256), # Resizing images to (180, 180) pixels
batch_size=32 # Batch size for training
)
# Loading the validation dataset from the specified directory
validation_dataset = image_dataset_from_directory(
data_folder / "validation",
image_size=(256, 256),
batch_size=32
)
# Loading the test dataset from the specified directory
test_dataset = image_dataset_from_directory(
data_folder / "test",
image_size=(256, 256),
batch_size=32
)
Found 5600 files belonging to 2 classes. Found 1200 files belonging to 2 classes. Found 1200 files belonging to 2 classes.
import keras
from keras import layers
# Define the input shape for the model
inputs = keras.Input(shape=(256, 256, 3))
# Rescale input values to be between 0 and 1
x = layers.Rescaling(1./255)(inputs)
# Convolutional Block 1
x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
# Convolutional Block 2
x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
# Convolutional Block 3
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
# Convolutional Block 4
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
# Convolutional Block 5
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
# Flatten the output from convolutional layers for dense layers
x = layers.Flatten()(x)
# Dense layers
x = layers.Dense(512, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.5)(x)
# Output layer with 2 neurons and softmax activation function for binary classification
outputs = layers.Dense(1, activation="sigmoid")(x)
# Define the model using functional API, specifying inputs and outputs
model = keras.Model(inputs=inputs, outputs=outputs)
model.summary()
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 256, 256, 3)] 0
rescaling_1 (Rescaling) (None, 256, 256, 3) 0
conv2d_5 (Conv2D) (None, 254, 254, 32) 896
max_pooling2d_4 (MaxPoolin (None, 127, 127, 32) 0
g2D)
conv2d_6 (Conv2D) (None, 125, 125, 64) 18496
max_pooling2d_5 (MaxPoolin (None, 62, 62, 64) 0
g2D)
conv2d_7 (Conv2D) (None, 60, 60, 128) 73856
max_pooling2d_6 (MaxPoolin (None, 30, 30, 128) 0
g2D)
conv2d_8 (Conv2D) (None, 28, 28, 256) 295168
max_pooling2d_7 (MaxPoolin (None, 14, 14, 256) 0
g2D)
conv2d_9 (Conv2D) (None, 12, 12, 256) 590080
flatten_1 (Flatten) (None, 36864) 0
dense_2 (Dense) (None, 512) 18874880
batch_normalization_1 (Bat (None, 512) 2048
chNormalization)
dropout_1 (Dropout) (None, 512) 0
dense_3 (Dense) (None, 1) 513
=================================================================
Total params: 19855937 (75.74 MB)
Trainable params: 19854913 (75.74 MB)
Non-trainable params: 1024 (4.00 KB)
_________________________________________________________________
# Compile the model with binary crossentropy loss, RMSprop optimizer, and accuracy metric
model.compile(loss="binary_crossentropy",
optimizer="rmsprop",
metrics=["accuracy"])
# Create ModelCheckpoint callback to save the best model based on validation loss
model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
filepath="./models/convnet_from_scratch.keras",
save_best_only=True, # Save only the best model
monitor="val_loss" # Monitor validation loss
)
# Train the model with custom and ModelCheckpoint callbacks
history = model.fit(
train_dataset,
epochs=25,
validation_data=validation_dataset,
callbacks=model_checkpoint_callback
)
Epoch 1/25 175/175 [==============================] - 31s 106ms/step - loss: 0.7672 - accuracy: 0.5539 - val_loss: 1.1085 - val_accuracy: 0.5333 Epoch 2/25 175/175 [==============================] - 15s 83ms/step - loss: 0.6746 - accuracy: 0.6202 - val_loss: 1.1623 - val_accuracy: 0.5217 Epoch 3/25 175/175 [==============================] - 17s 92ms/step - loss: 0.5932 - accuracy: 0.6834 - val_loss: 0.5850 - val_accuracy: 0.6917 Epoch 4/25 175/175 [==============================] - 16s 89ms/step - loss: 0.5604 - accuracy: 0.7198 - val_loss: 0.5831 - val_accuracy: 0.6717 Epoch 5/25 175/175 [==============================] - 15s 85ms/step - loss: 0.5260 - accuracy: 0.7411 - val_loss: 0.6933 - val_accuracy: 0.6483 Epoch 6/25 175/175 [==============================] - 17s 93ms/step - loss: 0.4904 - accuracy: 0.7693 - val_loss: 0.5521 - val_accuracy: 0.7042 Epoch 7/25 175/175 [==============================] - 16s 91ms/step - loss: 0.4638 - accuracy: 0.7862 - val_loss: 0.5984 - val_accuracy: 0.7133 Epoch 8/25 175/175 [==============================] - 16s 90ms/step - loss: 0.4369 - accuracy: 0.8005 - val_loss: 0.5054 - val_accuracy: 0.7783 Epoch 9/25 175/175 [==============================] - 16s 89ms/step - loss: 0.4009 - accuracy: 0.8205 - val_loss: 0.4548 - val_accuracy: 0.7867 Epoch 10/25 175/175 [==============================] - 17s 92ms/step - loss: 0.3754 - accuracy: 0.8366 - val_loss: 0.4211 - val_accuracy: 0.8000 Epoch 11/25 175/175 [==============================] - 15s 86ms/step - loss: 0.3380 - accuracy: 0.8539 - val_loss: 0.4291 - val_accuracy: 0.8075 Epoch 12/25 175/175 [==============================] - 16s 91ms/step - loss: 0.3335 - accuracy: 0.8504 - val_loss: 0.4510 - val_accuracy: 0.8133 Epoch 13/25 175/175 [==============================] - 15s 85ms/step - loss: 0.2793 - accuracy: 0.8793 - val_loss: 0.4256 - val_accuracy: 0.8233 Epoch 14/25 175/175 [==============================] - 16s 90ms/step - loss: 0.2442 - accuracy: 0.8996 - val_loss: 0.4389 - val_accuracy: 0.8242 Epoch 15/25 175/175 [==============================] - 16s 88ms/step - loss: 0.1914 - accuracy: 0.9216 - val_loss: 0.5623 - val_accuracy: 0.8033 Epoch 16/25 175/175 [==============================] - 15s 87ms/step - loss: 0.1655 - accuracy: 0.9350 - val_loss: 0.8706 - val_accuracy: 0.7608 Epoch 17/25 175/175 [==============================] - 16s 87ms/step - loss: 0.1405 - accuracy: 0.9434 - val_loss: 0.6880 - val_accuracy: 0.7967 Epoch 18/25 175/175 [==============================] - 15s 85ms/step - loss: 0.1179 - accuracy: 0.9543 - val_loss: 0.5652 - val_accuracy: 0.8392 Epoch 19/25 175/175 [==============================] - 16s 91ms/step - loss: 0.0851 - accuracy: 0.9680 - val_loss: 1.3662 - val_accuracy: 0.7000 Epoch 20/25 175/175 [==============================] - 15s 86ms/step - loss: 0.0791 - accuracy: 0.9691 - val_loss: 1.1654 - val_accuracy: 0.7825 Epoch 21/25 175/175 [==============================] - 15s 85ms/step - loss: 0.0753 - accuracy: 0.9730 - val_loss: 0.5216 - val_accuracy: 0.8433 Epoch 22/25 175/175 [==============================] - 15s 86ms/step - loss: 0.0648 - accuracy: 0.9761 - val_loss: 2.8778 - val_accuracy: 0.6717 Epoch 23/25 175/175 [==============================] - 16s 87ms/step - loss: 0.0614 - accuracy: 0.9775 - val_loss: 0.7173 - val_accuracy: 0.8400 Epoch 24/25 175/175 [==============================] - 15s 86ms/step - loss: 0.0491 - accuracy: 0.9816 - val_loss: 0.7711 - val_accuracy: 0.8208 Epoch 25/25 175/175 [==============================] - 15s 86ms/step - loss: 0.0540 - accuracy: 0.9820 - val_loss: 0.8487 - val_accuracy: 0.8283
def plot_model_history(model_history, acc='accuracy', val_acc='val_accuracy'):
"""
Function to plot model training history.
Parameters:
model_history (History): History object returned by model.fit()
acc (str): Name of the training accuracy metric
val_acc (str): Name of the validation accuracy metric
"""
# Create a figure with two subplots side by side
fig, axs = plt.subplots(1, 2, figsize=(15, 5))
# Plot training and validation accuracy
axs[0].plot(range(1, len(model_history.history[acc]) + 1), model_history.history[acc])
axs[0].plot(range(1, len(model_history.history[val_acc]) + 1), model_history.history[val_acc])
axs[0].set_title('Model Accuracy')
axs[0].set_ylabel('Accuracy')
axs[0].set_xlabel('Epoch')
axs[0].set_xticks(np.arange(1, len(model_history.history[acc]) + 1))
axs[0].legend(['train', 'val'], loc='best')
# Plot training and validation loss
axs[1].plot(range(1, len(model_history.history['loss']) + 1), model_history.history['loss'])
axs[1].plot(range(1, len(model_history.history['val_loss']) + 1), model_history.history['val_loss'])
axs[1].set_title('Model Loss')
axs[1].set_ylabel('Loss')
axs[1].set_xlabel('Epoch')
axs[1].set_xticks(np.arange(1, len(model_history.history['loss']) + 1))
axs[1].legend(['train', 'val'], loc='best')
plt.show()
plot_model_history(history)
We can see that from the above graphs the best epoch level for the Vanilla Model is: 14 based on the above accuracy and loss.
After Epoch 14 we can notice that the model tends to fluctuate alot and might even overfit.
# Load the VGG16 model pre-trained on ImageNet data, excluding the fully-connected layers at the top
conv_base = keras.applications.vgg16.VGG16(
weights="imagenet", # Load pre-trained weights from ImageNet
include_top=False, # Exclude the fully-connected layers at the top
input_shape=(256, 256, 3)
)
# Set the convolutional base (VGG16) to be non-trainable
conv_base.trainable = False
# Display a summary of the convolutional base (VGG16) architecture
conv_base.summary()
Model: "vgg16"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_7 (InputLayer) [(None, 256, 256, 3)] 0
block1_conv1 (Conv2D) (None, 256, 256, 64) 1792
block1_conv2 (Conv2D) (None, 256, 256, 64) 36928
block1_pool (MaxPooling2D) (None, 128, 128, 64) 0
block2_conv1 (Conv2D) (None, 128, 128, 128) 73856
block2_conv2 (Conv2D) (None, 128, 128, 128) 147584
block2_pool (MaxPooling2D) (None, 64, 64, 128) 0
block3_conv1 (Conv2D) (None, 64, 64, 256) 295168
block3_conv2 (Conv2D) (None, 64, 64, 256) 590080
block3_conv3 (Conv2D) (None, 64, 64, 256) 590080
block3_pool (MaxPooling2D) (None, 32, 32, 256) 0
block4_conv1 (Conv2D) (None, 32, 32, 512) 1180160
block4_conv2 (Conv2D) (None, 32, 32, 512) 2359808
block4_conv3 (Conv2D) (None, 32, 32, 512) 2359808
block4_pool (MaxPooling2D) (None, 16, 16, 512) 0
block5_conv1 (Conv2D) (None, 16, 16, 512) 2359808
block5_conv2 (Conv2D) (None, 16, 16, 512) 2359808
block5_conv3 (Conv2D) (None, 16, 16, 512) 2359808
block5_pool (MaxPooling2D) (None, 8, 8, 512) 0
=================================================================
Total params: 14714688 (56.13 MB)
Trainable params: 0 (0.00 Byte)
Non-trainable params: 14714688 (56.13 MB)
_________________________________________________________________
# Define data augmentation pipeline using Keras Sequential API
data_augmentation = keras.Sequential(
[
# Randomly flip images horizontally
layers.RandomFlip("horizontal"),
# Randomly rotate images by a maximum of 0.1 radians
layers.RandomRotation(0.1),
# Randomly zoom into images by a maximum of 20%
layers.RandomZoom(0.2),
]
)
inputs = keras.Input(shape=(256, 256, 3))
# Apply data augmentation to the input data
x = data_augmentation(inputs)
# Preprocess the input data using VGG16's preprocess_input function
x = keras.applications.vgg16.preprocess_input(x)
# Feature Extraction
# Pass preprocessed data through the convolutional base (VGG16)
x = conv_base(x)
# Flatten the output from the convolutional base
x = layers.Flatten()(x)
# Classifier Head
# Add a dense layer with 256 units and ReLU activation
x = layers.Dense(256)(x)
# Apply dropout with a rate of 0.5 to prevent overfitting
x = layers.Dropout(0.5)(x)
# Add a dense layer with a single unit and sigmoid activation for binary classification
outputs = layers.Dense(1, activation="sigmoid")(x)
# Define the model with inputs and outputs
model_vgg = keras.Model(inputs, outputs)
# Display a summary of the model architecture and parameters
model_vgg.summary()
Model: "model_4"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_8 (InputLayer) [(None, 256, 256, 3)] 0
sequential_1 (Sequential) (None, 256, 256, 3) 0
tf.__operators__.getitem_2 (None, 256, 256, 3) 0
(SlicingOpLambda)
tf.nn.bias_add_2 (TFOpLamb (None, 256, 256, 3) 0
da)
vgg16 (Functional) (None, 8, 8, 512) 14714688
flatten_4 (Flatten) (None, 32768) 0
dense_8 (Dense) (None, 256) 8388864
dropout_4 (Dropout) (None, 256) 0
dense_9 (Dense) (None, 1) 257
=================================================================
Total params: 23103809 (88.13 MB)
Trainable params: 8389121 (32.00 MB)
Non-trainable params: 14714688 (56.13 MB)
_________________________________________________________________
# Compile the model
model_vgg.compile(loss="binary_crossentropy",
optimizer="adam",
metrics=["accuracy"])
# Custom callback to save the best model weights
class SaveBestModelCallback(callbacks.Callback):
def __init__(self, filepath):
super(SaveBestModelCallback, self).__init__()
self.filepath = filepath
self.best_val_loss = float('inf')
def on_epoch_end(self, epoch, logs=None):
val_loss = logs.get('val_loss')
if val_loss is not None and val_loss < self.best_val_loss:
self.best_val_loss = val_loss
self.model.save_weights(self.filepath)
# Create custom callback instance
model_checkpoint_callback = SaveBestModelCallback(filepath="./models/vgg16_best_weights.hdf5")
# Train the model
history = model_vgg.fit(
train_dataset,
epochs=25,
validation_data=validation_dataset,
callbacks=[model_checkpoint_callback]
)
Epoch 1/25 175/175 [==============================] - 41s 223ms/step - loss: 1.9406 - accuracy: 0.9252 - val_loss: 0.1307 - val_accuracy: 0.9750 Epoch 2/25 175/175 [==============================] - 39s 221ms/step - loss: 0.1223 - accuracy: 0.9579 - val_loss: 0.0818 - val_accuracy: 0.9817 Epoch 3/25 175/175 [==============================] - 39s 221ms/step - loss: 0.1149 - accuracy: 0.9636 - val_loss: 0.0708 - val_accuracy: 0.9767 Epoch 4/25 175/175 [==============================] - 39s 222ms/step - loss: 0.1257 - accuracy: 0.9646 - val_loss: 0.0495 - val_accuracy: 0.9817 Epoch 5/25 175/175 [==============================] - 39s 217ms/step - loss: 0.0964 - accuracy: 0.9677 - val_loss: 0.0586 - val_accuracy: 0.9833 Epoch 6/25 175/175 [==============================] - 38s 219ms/step - loss: 0.0928 - accuracy: 0.9704 - val_loss: 0.0566 - val_accuracy: 0.9833 Epoch 7/25 175/175 [==============================] - 42s 238ms/step - loss: 0.1123 - accuracy: 0.9695 - val_loss: 0.0562 - val_accuracy: 0.9825 Epoch 8/25 175/175 [==============================] - 41s 232ms/step - loss: 0.0759 - accuracy: 0.9755 - val_loss: 0.0459 - val_accuracy: 0.9875 Epoch 9/25 175/175 [==============================] - 40s 224ms/step - loss: 0.0768 - accuracy: 0.9739 - val_loss: 0.0381 - val_accuracy: 0.9867 Epoch 10/25 175/175 [==============================] - 43s 241ms/step - loss: 0.0680 - accuracy: 0.9746 - val_loss: 0.0568 - val_accuracy: 0.9817 Epoch 11/25 175/175 [==============================] - 40s 224ms/step - loss: 0.0714 - accuracy: 0.9761 - val_loss: 0.0751 - val_accuracy: 0.9817 Epoch 12/25 175/175 [==============================] - 38s 218ms/step - loss: 0.0615 - accuracy: 0.9802 - val_loss: 0.0415 - val_accuracy: 0.9833 Epoch 13/25 175/175 [==============================] - 42s 236ms/step - loss: 0.0676 - accuracy: 0.9766 - val_loss: 0.0575 - val_accuracy: 0.9833 Epoch 14/25 175/175 [==============================] - 38s 218ms/step - loss: 0.0519 - accuracy: 0.9820 - val_loss: 0.0469 - val_accuracy: 0.9850 Epoch 15/25 175/175 [==============================] - 38s 217ms/step - loss: 0.0564 - accuracy: 0.9823 - val_loss: 0.0785 - val_accuracy: 0.9833 Epoch 16/25 175/175 [==============================] - 38s 218ms/step - loss: 0.0606 - accuracy: 0.9814 - val_loss: 0.0553 - val_accuracy: 0.9833 Epoch 17/25 175/175 [==============================] - 42s 238ms/step - loss: 0.0501 - accuracy: 0.9820 - val_loss: 0.0636 - val_accuracy: 0.9842 Epoch 18/25 175/175 [==============================] - 39s 219ms/step - loss: 0.0448 - accuracy: 0.9854 - val_loss: 0.0576 - val_accuracy: 0.9850 Epoch 19/25 175/175 [==============================] - 38s 217ms/step - loss: 0.0457 - accuracy: 0.9866 - val_loss: 0.0492 - val_accuracy: 0.9850 Epoch 20/25 175/175 [==============================] - 42s 238ms/step - loss: 0.0536 - accuracy: 0.9846 - val_loss: 0.1110 - val_accuracy: 0.9800 Epoch 21/25 175/175 [==============================] - 39s 220ms/step - loss: 0.0564 - accuracy: 0.9811 - val_loss: 0.0590 - val_accuracy: 0.9900 Epoch 22/25 175/175 [==============================] - 38s 216ms/step - loss: 0.0546 - accuracy: 0.9816 - val_loss: 0.0452 - val_accuracy: 0.9858 Epoch 23/25 175/175 [==============================] - 42s 237ms/step - loss: 0.0543 - accuracy: 0.9825 - val_loss: 0.0577 - val_accuracy: 0.9858 Epoch 24/25 175/175 [==============================] - 42s 238ms/step - loss: 0.0540 - accuracy: 0.9811 - val_loss: 0.0876 - val_accuracy: 0.9833 Epoch 25/25 175/175 [==============================] - 38s 218ms/step - loss: 0.0439 - accuracy: 0.9839 - val_loss: 0.0506 - val_accuracy: 0.9900
plot_model_history(history)
We can see that from the above graphs the best epoch level for the VGG Model is: 14 based on the above accuracy and loss.
After Epoch 14 we can notice that the model tends to fluctuate alot and might even overfit.
# Define paths and batch size
test_data_dir = './train/kaggle_dogs_vs_cats_small/test' # Directory containing test data
best_model_1_path = './models/convnet_from_scratch.keras' # Path to the first best model
best_model_2_path = './models/vgg16_best_weights.hdf5' # Path to the second best model
batch_size = 32 # Batch size for evaluation
# Load the best versions of each model
vanilla_model_best = load_model(best_model_1_path) # Load first best model
vgg_model_best = load_model(best_model_2_path) # Load second best model
# Evaluate the model on the test data
test_loss, test_acc = vanilla_model_best.evaluate(test_dataset)
# Print the results
print("Vanilla Model Evaluation:")
print(f"Test loss: {test_loss:.4f}")
print(f"Test accuracy: {test_acc:.4f}")
38/38 [==============================] - 2s 42ms/step - loss: 0.4577 - accuracy: 0.7942 Vanilla Model Evaluation: Test loss: 0.4577 Test accuracy: 0.7942
# Evaluate the model on the test data
test_loss, test_acc = vgg_model_best.evaluate(test_dataset)
# Print the results
print("VGG Model Evaluation:")
print(f"Test loss: {test_loss:.4f}")
print(f"Test accuracy: {test_acc:.4f}")
38/38 [==============================] - 7s 165ms/step - loss: 0.0749 - accuracy: 0.9825 VGG Model Evaluation: Test loss: 0.0749 Test accuracy: 0.9825
import numpy as np
# Initialize lists to store true labels and predictions for both models
y_true = []
y_pred_model_1 = []
y_pred_model_2 = []
# Iterate over the test dataset to extract true labels
for images, labels in test_dataset:
y_true.extend(labels.numpy()) # Append true labels to the list
# Convert the true labels list to a NumPy array
y_true = np.array(y_true)
# Get predictions for both models
y_pred_model_1 = np.squeeze(vanilla_model_best.predict(test_dataset)) # Predictions of model 1
y_pred_model_2 = np.squeeze(vgg_model_best.predict(test_dataset)) # Predictions of model 2
38/38 [==============================] - 2s 44ms/step 38/38 [==============================] - 7s 168ms/step
# Calculate confusion matrices
cm_model_1 = confusion_matrix(y_true, y_pred_model_1 > 0.5) # Confusion matrix for model 1
cm_model_2 = confusion_matrix(y_true, y_pred_model_2 > 0.5) # Confusion matrix for model 2
# Plot confusion matrices
plt.figure(figsize=(12, 6))
# Subplot for Model 1 confusion matrix
plt.subplot(1, 2, 1)
plt.title("Confusion Matrix - Vanilla Model")
plt.imshow(cm_model_1, cmap=plt.cm.Blues, interpolation='nearest')
plt.colorbar()
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.xticks([0, 1], ["Cat", "Dog"])
plt.yticks([0, 1], ["Cat", "Dog"])
# Subplot for Model 2 confusion matrix
plt.subplot(1, 2, 2)
plt.title("Confusion Matrix - VGG Model")
plt.imshow(cm_model_2, cmap=plt.cm.Blues, interpolation='nearest')
plt.colorbar()
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.xticks([0, 1], ["Cat", "Dog"])
plt.yticks([0, 1], ["Cat", "Dog"])
plt.show()
# Calculate precision, recall, and F1-score for both models
print("\nModel 1 Classification Report:")
print(classification_report(y_true, y_pred_model_1 > 0.7, target_names=['Cat', 'Dog']))
print("\nModel 2 Classification Report:")
print(classification_report(y_true, y_pred_model_2 > 0.7, target_names=['Cat', 'Dog']))
Model 1 Classification Report:
precision recall f1-score support
Cat 0.51 0.73 0.60 600
Dog 0.53 0.30 0.39 600
accuracy 0.52 1200
macro avg 0.52 0.52 0.49 1200
weighted avg 0.52 0.52 0.49 1200
Model 2 Classification Report:
precision recall f1-score support
Cat 0.48 0.49 0.49 600
Dog 0.48 0.48 0.48 600
accuracy 0.48 1200
macro avg 0.48 0.48 0.48 1200
weighted avg 0.48 0.48 0.48 1200
# Calculate precision-recall curve for both models
precision_model_1, recall_model_1, _ = precision_recall_curve(y_true, y_pred_model_1)
precision_model_2, recall_model_2, _ = precision_recall_curve(y_true, y_pred_model_2)
# Plot precision-recall curve
plt.figure(figsize=(8, 6))
plt.plot(recall_model_1, precision_model_1, label='Vanilla Model')
plt.plot(recall_model_2, precision_model_2, label='VGG Model')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend()
plt.grid(True)
plt.show()
test_dir = "./train/kaggle_dogs_vs_cats_small/test"
# List to store the filenames of all test images
test_filenames = []
# Get the filenames of all images in the "cat" and "dog" subdirectories
for category in ["cat", "dog"]:
category_dir = os.path.join(test_dir, category)
filenames = os.listdir(category_dir)
# Append the file paths to the list of test filenames
test_filenames.extend([os.path.join(category, fname) for fname in filenames])
# Make predictions for each image
predictions = []
for filename in test_filenames:
img_path = os.path.join(test_dir, filename)
# Load and preprocess the image
img = load_img(img_path, target_size=(256, 256))
img_array = np.expand_dims(img, axis=0) / 255.0 # Normalize pixel values to [0, 1]
# Make prediction using the model
prediction = vgg_model_best.predict(img_array, verbose=0)
predictions.append(prediction)
threshold = 0.5
# Convert predictions to binary categories based on the threshold
binary_predictions = [1 if pred > threshold else 0 for pred in predictions]
plt.figure(figsize=(12, 12))
for i, filename in enumerate(test_filenames[:9]):
img_path = os.path.join(test_dir, filename)
# Load the image
img = load_img(img_path, target_size=(256, 256))
plt.subplot(3, 3, i+1)
plt.imshow(img)
if binary_predictions[i] == 0:
label = "cat"
else:
label = "dog"
plt.title(f"Prediction: {label}")
plt.axis("off")
plt.tight_layout()
plt.show()
def get_binary_predictions(y_pred, threshold):
"""Get binary predictions based on a threshold."""
return (y_pred > threshold).astype(int)
def calculate_incorrect_predictions(predictions, y_true, filenames):
"""Calculate incorrect predictions."""
incorrect_predictions = []
for i, filename in enumerate(filenames):
actual_label = "dog" if y_true[i] == 1 else "cat"
predicted_label = "dog" if predictions[i] == 1 else "cat"
if actual_label != predicted_label:
incorrect_predictions.append((filename, actual_label, predicted_label))
return incorrect_predictions
# Get binary predictions for both models
binary_predictions_model_1 = get_binary_predictions(y_pred_model_1, threshold)
binary_predictions_model_2 = get_binary_predictions(y_pred_model_2, threshold)
# Calculate incorrect predictions for both models
incorrect_predictions_model_1 = calculate_incorrect_predictions(binary_predictions_model_1, y_true, test_dataset.filenames)
incorrect_predictions_model_2 = calculate_incorrect_predictions(binary_predictions_model_2, y_true, test_dataset.filenames)
# Display information about incorrect predictions for Model 1
print("Number of Incorrect Predictions")
print(f"Vanilla Model: {len(incorrect_predictions_model_1)} ")
# for filename, actual_label, predicted_label in incorrect_predictions_model_1:
# print(f"Filename: {filename}, Actual Label: {actual_label}, Predicted Label: {predicted_label}")
# Display information about incorrect predictions for Model 2
print(f"VGG Model: {len(incorrect_predictions_model_2)} ")
Number of Incorrect Predictions Vanilla Model: 587 VGG Model: 619
num_incorrect_plot = 5
# Function to plot incorrect images
def plot_incorrect_images(incorrect_predictions, model_name):
plt.figure(figsize=(12, 10))
plt.suptitle(f"Incorrect Predictions for {model_name}", fontsize=16)
for i, (filename, actual_label, predicted_label) in enumerate(incorrect_predictions[:num_incorrect_plot]):
img_path = os.path.join(original_dir, filename)
img = load_img(img_path, target_size=(150, 150))
plt.subplot(2, num_incorrect_plot, i + 1)
plt.imshow(img)
plt.title(f"Actual: {actual_label}\nPredicted: {predicted_label}")
plt.axis('off')
plt.tight_layout()
plt.show()
plot_incorrect_images(incorrect_predictions_model_1, "Vanilla Model")
plot_incorrect_images(incorrect_predictions_model_2, "VGG Model")
From the above visualisations we can see that both the models fail to predict the dogs majority of the times. This would indicate that the model is having difficulties and might have overtrained with the cat images and is more likely to predict images of dogs as cat too.
In This notebook we present a comprehensive evaluation of two image classification models designed to classify images of cats and dogs. The evaluation compares the performance of a Vanilla Model trained from scratch with a VGG Model pretrained on the ImageNet dataset. Through this analysis, we aim to understand the effectiveness of transfer learning in improving classification accuracy and explore the strengths and limitations of each model architecture.
Two distinct model architectures are trained on the dataset:
plot_model_history is defined to visualize model loss and accuracy over epochs.Model Training and Evaluation:
Confusion Matrices:
After training, both models were evaluated on the test dataset.
In general Transfer learning with VGG16 tends to provide better results, especially with limited training data, as it captures more complex features learned from the ImageNet dataset. In this case VGG 16 was slightly better than the Vanilla Model which indicates that there is still room for improving the model by fine-tuning the hyperparameters, such as learning rate or dropout rate, allowing us to achieve higher accuracy on the dataset.